using DataFrames, CSV, LinearAlgebra, Statistics, StatsBase, BlockDiagonals

#read in stuff
bootdir = "C:\\Users\\Garrett\\Documents\\Grad_School\\Papers\\SIUM_JPE\\Replication\\Model\\boot"

temp = CSV.read("$bootdir\\attendance_coll_boot_sample.csv", DataFrame; skipto=1)
attendance_coll = Matrix(temp)

temp = CSV.read("$bootdir\\attendance_noncoll_boot_sample.csv", DataFrame; skipto=1)
attendance_ncoll = Matrix(temp)

temp = CSV.read("$bootdir\\cds_time_boot_sample.csv", DataFrame; skipto=1)
time = Matrix(temp)

temp = CSV.read("$bootdir\\earnings_boot_sample.csv", DataFrame; skipto=1)
earnings = Matrix(temp)

temp = CSV.read("$bootdir\\mig_dynamic_boot_sample.csv", DataFrame; skipto=1)
mig_dynamic = Matrix(temp)

temp = CSV.read("$bootdir\\mig_moments_boot_sample.csv", DataFrame; skipto=1)
mig_moments = Matrix(temp)

temp = CSV.read("$bootdir\\iim_se.csv", DataFrame; skipto=1)
iim = Matrix(temp)

temp = CSV.read("$bootdir\\nlsy_r2_boot_sample.csv", DataFrame; skipto=1)
coll_r2 = Matrix(temp)

###state-level migration inflow, outflow, iim
temp = CSV.read("$bootdir\\outflow_boot_sample.csv", DataFrame; skipto=1)
state_outflow = Matrix(temp)

temp = CSV.read("$bootdir\\inflow_boot_sample.csv", DataFrame; skipto=1)
state_inflow = Matrix(temp).*50 #crank up abit, otherwise these really small numbers result in enormous weights

temp = CSV.read("$bootdir\\state_iim_se.csv", DataFrame; skipto=1)
state_iim = Matrix(temp)

temp = CSV.read("$bootdir\\coll_race_boot_sample.csv", DataFrame; skipto=1)
coll_race = Matrix(temp)

temp = CSV.read("$bootdir\\mig_race_boot_sample.csv", DataFrame; skipto=1)
mig_race = Matrix(temp)

temp = CSV.read("$bootdir\\earnings_ses_v2.csv", DataFrame; skipto=1)
earnings_ses = Matrix(temp)



#function to make inverse variance-covariance matrix of moments
function var_covar(M::Array{Float64, 2})
    n = length(M[1,:]) #length of row
    vcmat = zeros(n , n)
    for i = 1:n
        vcmat[i, i] = cov(M[:,i], M[:,i]) #just do the diagonal. Covariances generally don't change much and can get weird.
    end

    #invert
    vcmat_inv = pinv(vcmat)
    vcmat_inv
end


mat_iim = iim #SD of IIM Chetty estimate
mat_mig_dynamic = var_covar(mig_dynamic)
mat_attendance_coll = var_covar(attendance_coll)
mat_attendance_noncoll = var_covar(attendance_ncoll)
mat_time = copy(iim)
mat_time[1,1] = 7000 #hacky thing here: smoothing of hours makes the spread of this parameter really small, resulting in it dominating
#the objective function. Here I set the weight equal to median weight among other PSID moments.
mat_earnings = var_covar(earnings)
mat_mig_moments = var_covar(mig_moments)
mat_coll_r2 = var_covar(coll_r2)
mat_outflow = var_covar(state_outflow)
mat_inflow = var_covar(state_inflow)
mat_coll_race = var_covar(coll_race)
mat_mig_race = var_covar(mig_race)



#doctor earnings matrix
mat_earnings[1,1] = (1/earnings_ses[1]^2) #mean young earnings, hs
mat_earnings[2,2] = (1/earnings_ses[5]^2) #sd young earnings, hs
mat_earnings[3,3] = (1/earnings_ses[2]^2) #mean old earnings, hs
mat_earnings[4,4] = (1/earnings_ses[6]^2) #sd old earnings, hs
mat_earnings[9,9] = (1/earnings_ses[3]^2) #mean young earnings, coll
mat_earnings[10,10] = (1/earnings_ses[7]^2) #sd young earnings, coll
mat_earnings[11,11] = (1/earnings_ses[4]^2) #mean old earnings, coll
mat_earnings[12,12] = (1/earnings_ses[8]^2) #sd old earnings, coll


mat_iim_state = zeros(50, 50)
for i = 1:50
    mat_iim_state[i, i] = 1/(state_iim[i]^2)
end


W = BlockDiagonal([mat_iim, mat_earnings, mat_time, mat_attendance_noncoll, mat_attendance_coll, mat_mig_moments, mat_mig_dynamic, mat_coll_r2, mat_outflow, mat_inflow, mat_iim_state, mat_coll_race, mat_mig_race])

CSV.write("$bootdir\\weight_matrix.csv", DataFrame(W, :auto), header=false) #write CSV file






####
